# Importing Libraries
import folium
import pandas as pd
import geopandas as gpd
import numpy as np
import warnings
# Getting the data
arrest_table = pd.read_csv("https://cmsc320.github.io/files/BPD_Arrests.csv")
arrest_table = arrest_table[pd.notnull(arrest_table["Location 1"])]
arrest_table["lat"], arrest_table["long"] = arrest_table["Location 1"].str.split(",").str
arrest_table["lat"] = arrest_table["lat"].str.replace("(", "").astype(float)
arrest_table["long"] = arrest_table["long"].str.replace(")", "").astype(float)
# surpressing deprecation warning
warnings.filterwarnings('ignore');
arrest_table.head()
# Aggregating the # of arrests per neighborhood
neighborhoods_counts = arrest_table.groupby(['neighborhood']).count()
# reseting index so that the neighborhood names are a column instead of the index variable
neighborhood_arrest_counts = neighborhoods_counts.loc[:,'arrest'].reset_index()
#renaming arrest column to arrest count
neighborhood_arrest_counts = neighborhood_arrest_counts.rename(columns= {"arrest":"arrest_count"})
neighborhood_arrest_counts.head()
| neighborhood | arrest_count | |
|---|---|---|
| 0 | Abell | 58 |
| 1 | Allendale | 296 |
| 2 | Arcadia | 75 |
| 3 | Arlington | 682 |
| 4 | Armistead Gardens | 142 |
# check how many rows we have
neighborhood_arrest_counts.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 267 entries, 0 to 266 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 neighborhood 267 non-null object 1 arrest_count 267 non-null int64 dtypes: int64(1), object(1) memory usage: 4.3+ KB
# Getting Baltimore's neighborhood geodata
# GeoJSON source
neighborhood_geo_url = 'https://opendata.arcgis.com/datasets/f63445d9528349febb81f0b89cd8be5b_0.geojson'
# Using geopandas to read the geoJSON data into a Dataframe, converting shape values into POLYGON shapes for each neighborhood with coordinates
geoJSON_df = gpd.read_file(neighborhood_geo_url)
# Only need the neighborhood names, population density, and the geometry of each neighborhood
geoJSON_df = geoJSON_df.loc[:,['name','geometry']]
geoJSON_df.head()
| name | geometry | |
|---|---|---|
| 0 | Abell | POLYGON ((-76.61113 39.32345, -76.61060 39.323... |
| 1 | Allendale | POLYGON ((-76.67263 39.29184, -76.67330 39.291... |
| 2 | Arcadia | POLYGON ((-76.56853 39.33595, -76.56859 39.336... |
| 3 | Arlington | POLYGON ((-76.68627 39.34791, -76.68601 39.347... |
| 4 | Armistead Gardens | POLYGON ((-76.55880 39.30646, -76.55860 39.306... |
# check how many rows we have
geoJSON_df.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 278 entries, 0 to 277 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 name 278 non-null object 1 geometry 278 non-null geometry dtypes: geometry(1), object(1) memory usage: 4.5+ KB
As we can see the GeoJSON data has more neighborhoods than our arrest data. So let's see what's different
missing_hoods = np.setdiff1d(neighborhood_arrest_counts.neighborhood.values, geoJSON_df.name.values)
print(missing_hoods)
['Baltimore-Linwood' 'Booth-Boyd' 'Chinquapin Park-Belvedere' 'Christopher' 'Claremont-Freedom' 'Glenham-Belford' 'Harford-Echodale/Perring Parkway' 'Joseph Lee' 'SBIC' 'Washington Village' 'Wilson Heights' 'Woodring']
After some manual inspection of each table, I found some pairs that had a different name for the same area. However there are still some remaining neighborhoods there is no data for, so we will not include those on our map.
# Changing neighborhood names to match data in the two tables
neighborhood_arrest_counts.replace({'Booth-Boyd': 'Boyd-Booth', 'Chinquapin Park-Belvedere': 'Belvedere', 'SBIC': 'South Baltimore'}, inplace=True)
geoJSON_df.replace({'Glenham-Belhar': 'Glenham-Belford', 'Washington Village/Pigtown': 'Washington Village'}, inplace=True)
# Renaming column so there is a key value we can merge on
geoJSON_df = geoJSON_df.rename(columns= {"name":"neighborhood"})
# Merging the arrest
merged_df = geoJSON_df.merge(neighborhood_arrest_counts, on = 'neighborhood')
merged_df
| neighborhood | geometry | arrest_count | |
|---|---|---|---|
| 0 | Abell | POLYGON ((-76.61113 39.32345, -76.61060 39.323... | 58 |
| 1 | Allendale | POLYGON ((-76.67263 39.29184, -76.67330 39.291... | 296 |
| 2 | Arcadia | POLYGON ((-76.56853 39.33595, -76.56859 39.336... | 75 |
| 3 | Arlington | POLYGON ((-76.68627 39.34791, -76.68601 39.347... | 682 |
| 4 | Armistead Gardens | POLYGON ((-76.55880 39.30646, -76.55860 39.306... | 142 |
| ... | ... | ... | ... |
| 255 | Yale Heights | POLYGON ((-76.69292 39.27249, -76.69282 39.272... | 81 |
| 256 | Belvedere | POLYGON ((-76.60270 39.35934, -76.60263 39.359... | 32 |
| 257 | Belair-Edison | POLYGON ((-76.57629 39.32211, -76.57579 39.321... | 1485 |
| 258 | Four By Four | POLYGON ((-76.57995 39.31617, -76.58027 39.315... | 102 |
| 259 | Charles Village | POLYGON ((-76.61539 39.31791, -76.61539 39.317... | 367 |
260 rows × 3 columns
Much of the following code for formatting this Folium map is sourced from Navid Mashinchi's article "How to Step Up your Folium Choropleth Map skills". Source
# Plotting a map of Baltimore using folium
map_osm = folium.Map(location=[39.29, -76.61], zoom_start=11)
# Adding choropleth "heat map" of arrest count by neighborhood
folium.Choropleth(
geo_data=merged_df,
data=merged_df,
columns=['neighborhood', 'arrest_count'],
key_on='feature.properties.neighborhood',
fill_color='YlOrRd',
fill_opacity=0.6,
line_opacity=0.2,
legend_name="arrests",
smooth_factor=0,
Highlight= True,
line_color = "#414a4c",
name = "Arrests by Baltimore Neighborhood",
show=True,
overlay=True,
nan_fill_color = "White"
).add_to(map_osm)
# Add hover functionality.
style_function = lambda x: {'fillColor': '#ffffff',
'color':'#000000',
'fillOpacity': 0.1,
'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000',
'color':'#000000',
'fillOpacity': 0.50,
'weight': 0.1}
HOV = folium.features.GeoJson(
data = merged_df,
style_function=style_function,
control=False,
highlight_function=highlight_function,
tooltip=folium.features.GeoJsonTooltip(
fields=['neighborhood','arrest_count'],
aliases=['Neighborhood','Arrest Count'],
style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px")
)
)
map_osm.add_child(HOV)
map_osm.keep_in_front(HOV)
map_osm